clear
capture log close
local path "D:\BJP\WageShocks\ReplicationFiles"
log using `path'\LogFiles\OES_CleanUp.log, replace

*********************************************************************************
*************** Bring in 1999-2009 OES Data into Stata **************************
*********************************************************************************

import excel using `path'\RawData\OESExcelFiles\state_M1999_dl.xls, firstrow clear
keep state occ_code occ_title tot_emp h_mean
rename (occ_code occ_title h_mean) (onetsoccode_2000 description_2000 ave_hourly_wage)
gen str year = "1999"
save `path'\FormattedData\temp\OES_data_1999_2009.dta, replace

local first_yr = 0
local last_yr = 9

forvalues year = `first_yr'/`last_yr'{    
	if `year' <= 2{
		import excel using `path'\RawData\OESExcelFiles\state_M200`year'_dl.xls, firstrow clear
		keep state occ_code occ_title tot_emp h_mean
		rename (occ_code occ_title h_mean) (onetsoccode_2000 description_2000 ave_hourly_wage)
		gen year = "200`year'"
		append using `path'\FormattedData\temp\OES_data_1999_2009.dta, force
		save `path'\FormattedData\temp\OES_data_1999_2009.dta, replace
	}
	else{
		import excel using `path'\RawData\OESExcelFiles\state_M200`year'_dl.xls, firstrow clear
		keep STATE OCC_CODE OCC_TITLE TOT_EMP H_MEAN
		rename (STATE OCC_CODE OCC_TITLE TOT_EMP H_MEAN) (state onetsoccode_2000 description_2000 tot_emp ave_hourly_wage)
		gen year = "200`year'"
		append using `path'\FormattedData\temp\OES_data_1999_2009.dta, force
		save `path'\FormattedData\temp\OES_data_1999_2009.dta, replace
    }
}

replace onetsoccode_2000 = subinstr(onetsoccode_2000, "-", "",.)
destring tot_emp ave_hourly year, replace force
replace onetsoccode_2000 = strtrim(onetsoccode_2000)
replace description_2000 = strtrim(description_2000)
sort onetsoccode_2000
rename (onetsoccode_2000 description_2000) (onetsoccode description)
destring onetsoccode, replace 

drop if state=="Guam" | state=="Puerto Rico" | state=="Virgin Islands"
drop if tot_emp==.
save `path'\FormattedData\temp\OES_data_1999_2009.dta, replace

*********************************************************************************
*************** Bring in 2010-2018 OES Data into Stata **************************
*********************************************************************************

local first_yr = 10
local last_yr = 18

forvalues year = `first_yr'/`last_yr'{
    
	if `year' <= 13{
		import excel using `path'\RawData\OESExcelFiles\state_M20`year'_dl.xls, firstrow clear
	}
	else{
		import excel using `path'\RawData\OESExcelFiles\state_M20`year'_dl.xlsx, firstrow clear
	}
	
	keep STATE OCC_CODE OCC_TITLE TOT_EMP H_MEAN
	rename (STATE OCC_CODE OCC_TITLE TOT_EMP H_MEAN) (state onetsoccode_2010 description_2010 tot_emp ave_hourly_wage)
	gen year = "20`year'"
	
    if `year' == `first_yr'{
		save `path'\FormattedData\temp\OES_data_20`first_yr'_20`last_yr'.dta, replace
    }
    else{
		append using `path'\FormattedData\temp\OES_data_20`first_yr'_20`last_yr'.dta, force
		save `path'\FormattedData\temp\OES_data_20`first_yr'_20`last_yr'.dta, replace
    }
}

replace onetsoccode_2010 = subinstr(onetsoccode_2010, "-", "",.)
destring tot_emp ave_hourly year, replace force
replace onetsoccode_2010 = strtrim(onetsoccode_2010)
replace description_2010 = strtrim(description_2010)
sort onetsoccode_2010

*******************************************************************************
** Merge in Occupation Xwalk into 2010-2018 OES (which use 2010 SOC) to make **
** occupations based on 2000 SOC. Then, combine the few 2010 SOC codes that ***
** were broken out from a single 2000 SOC code. This is important because *****
** the task data are based on 2000 SOC. ***************************************
*******************************************************************************

merge m:1 onetsoccode_2010 using `path'\RawData\OES_OverTime_occ_xwalk
keep if _merge==3
drop _merge *2010
rename (onetsoccode_2000 description_2000) (onetsoccode description)
drop if onetsoccode == ""

drop if tot_emp ==.
drop if state=="Guam" | state=="Puerto Rico" | state=="Virgin Islands"
drop if ave_hourly_wage == .
sort year state onetsoccode

** Combine the few 2010 occs that were broken out into multiple occs from single 2000 SOC **
egen occ_tot_emp=sum(tot_emp), by(onetsoccode state year)
gen sh1=tot_emp/occ_tot_emp
gen t_wage=sh1*ave_hourly
egen ave_hourly2=sum(t_wage), by(onetsoccode state year)
drop ave_hourly_w tot_emp sh1 t_wage
rename (ave_hourly2 occ_tot_emp) (ave_hourly_wage tot_emp)
sort onetsoccode state year

collapse (mean) tot_emp ave, by(onetsoccode description state year)
destring onetsoccode, replace force

sort year state onetsoccode
save `path'\FormattedData\temp\OES_data_20`first_yr'_20`last_yr'.dta, replace
clear


*********************************************************
*************** Prep Minimum Wage Data ******************
*********************************************************

use `path'\RawData\monthly_mw_data, clear

keep if month== 5 
rename statename state
rename mean_mw mw

sort state year
local lag_name "lmw l2mw l3mw l4mw"
forvalues i = 1/4{
    local name : word `i' of `lag_name'
	gen `name' = mw[_n -`i'] if state == state[_n - `i']
}

local lead_name "fmw f2mw f3mw"
forvalues i = 1/3{
    local name : word `i' of `lead_name'
	gen `name' = mw[_n +`i'] if state == state[_n + `i']
}

keep if year >= 1999 & year<2019
keep state year lmw l2mw l3mw l4mw fmw f2mw f3mw mw

sort state year
gen MW_Ch_Next_Yr=ln(fmw)-ln(mw)
gen MW_Ch_This_Yr=ln(mw)-ln(lmw)
gen MW_Ch_Last_Yr=ln(lmw)-ln(l2mw)
gen MW_Ch_2Yrs_Ago=ln(l2mw)-ln(l3mw)

** Drop inflation-based adjustment states **
**gen inf_adj_state=(state=="Arizona" | state=="Colorado" | state=="Connecticut" | state=="Florida" | state=="Missouri" | state=="Montana" | state=="Ohio" | state=="Oregon" | state=="Vermont" | state=="Washington" )
gen inf_adj_state=(state=="Arizona" | state=="Colorado"  | state=="Florida" | state=="Missouri" | state=="Montana" | state=="Ohio" | state=="Oregon" | state=="Vermont" | state=="Washington" )
sort state year
save `path'\FormattedData\May_MW_Data_AllYr, replace
clear

**************************************************************************
********* Combine OES Data, Merge in Task and Minimum Wage Data **********
**************************************************************************
use `path'\FormattedData\temp\OES_data_1999_2009.dta
append using `path'\FormattedData\temp\OES_data_2010_2018.dta

gen description2=proper(description)
drop description
rename description2 description

***Merge in Task Data***
merge m:1 onetsoccode using `path'\RawData\AcemogluAutorTaskData
keep if _merge==3
drop _merge

***Merge in MW Data**
merge m:1 state year using `path'\FormattedData\May_MW_Data_AllYr
keep if _merge==3
drop if inf_adj_state==1
drop _merge


***************************************************************
********* Create Variables to Prep Data for Analysis **********
***************************************************************

*********** Drop Data with Missing Observations **********
drop if nr_cog_anal==. | nr_cog_pers==. | r_cog==. | r_man==. | nr_man_phys==. | nr_man_pers==. 

*********** Create Long Change in Employment and Base-year Employment Variables **********
forvalues j=1999/2018 {
gen temp_emp_`j'=tot_emp if year==`j'
egen tot_emp_`j'=mode(temp_emp_`j'), by(onetsoccode state)
drop temp_emp_`j'
}

gen ln_change_occ_employed3=ln(tot_emp_2002)-ln(tot_emp_1999) if year==2002
gen ln_change_occ_employed4=ln(tot_emp_2003)-ln(tot_emp_1999) if year==2003
gen orig_emp3=tot_emp_1999 if year==2002
gen orig_emp4=tot_emp_1999 if year==2003

forvalues k=2003/2018 {
local j1=(`k'-3)
local j2=(`k'-4)
replace ln_change_occ_employed3=ln(tot_emp_`k')-ln(tot_emp_`j1') if year==`k'
replace ln_change_occ_employed4=ln(tot_emp_`k')-ln(tot_emp_`j2') if `k'>2003 & year==`k'
replace orig_emp3=tot_emp_`j1' if year==`k'
replace orig_emp4=tot_emp_`j2' if year==`k' & `k'>2003
}

gen ln_orig_emp3=ln(orig_emp3)
gen ln_orig_emp4=ln(orig_emp4)


********** This Groups Occupation/States to Wage Intervals ***********
gen ave_state_w2mw=ave_hourly_wage/mw

gen yr_group=(year>=2010)
egen tot_occ_emp=sum(tot_emp), by(onetsoccode state yr_group)
replace tot_occ_emp=. if yr_group==0
gen w_share=tot_emp/tot_occ_emp
gen part_ave_w2mw=ave_state_w2mw*w_share
egen ave2_state_w2mw=sum(part_ave_w2mw), by(onetsoccode state)
drop tot_occ_emp

gen group=0
replace group=1 if  ave2_state_w2mw>=1 & ave2_state_w2mw<1.5
replace group=2 if  ave2_state_w2mw>=1.5 & ave2_state_w2mw<2
replace group=3 if  ave2_state_w2mw>=2 & ave2_state_w2mw<2.5
replace group=4 if  ave2_state_w2mw>=2.5 & ave2_state_w2mw<6

drop if group==0


************* Create Task Share Variables *********************
foreach task_var in "nr_cog_anal" "nr_cog_pers" "r_cog" "r_man" "nr_man_phys" "nr_man_pers" {
egen min_`task_var'=min(`task_var')
gen  `task_var'_renorm= `task_var'-min_`task_var'
}

egen total_task_intensivity=rsum(nr_cog_anal_renorm nr_cog_pers_renorm r_cog_renorm r_man_renorm nr_man_phys_renorm nr_man_pers_renorm)

foreach task_var in "nr_cog_anal" "nr_cog_pers" "r_cog" "r_man" "nr_man_phys" "nr_man_pers" {
gen share_`task_var'= `task_var'_renorm/total_task_intensivity
}

gen share_r=(r_cog_renorm+r_man_renorm)/total_task_intensivity
gen share_int=(nr_man_pers_renorm+nr_cog_pers_renorm)/total_task_intensivity
gen share_nr=(nr_cog_anal_renorm+nr_man_phys_renorm)/total_task_intensivity

****** This standardizes task share variables *****

gen RoutineSh=share_r_cog
gen RoutineSh2=share_r_man
gen RoutineSh3=share_r
gen IntSh=share_int
gen NRSh=share_nr
gen NRCogASh=nr_cog_anal
gen NRManPhysSh=nr_man_phys 

foreach task_var in "RoutineSh" "RoutineSh2" "RoutineSh3" "IntSh" "NRSh" "NRCogASh" "NRManPhysSh"{
egen ave_`task_var'=mean(`task_var'), by(group)
egen sd_`task_var'=sd(`task_var'), by(group)
replace `task_var'=(`task_var'-ave_`task_var')/sd_`task_var'
drop ave_`task_var' sd_`task_var'
}

*** This interacts the task shares with the change in the MW variables ***
foreach task_var in "RoutineSh" "RoutineSh2" "RoutineSh3" "IntSh" "NRSh" "NRCogASh" "NRManPhysSh"{
gen MW_Ch_Next_Yr_X_`task_var'=MW_Ch_Next_Yr*`task_var'
gen MW_Ch_This_Yr_X_`task_var'=MW_Ch_This_Yr*`task_var'
gen MW_Ch_Last_Yr_X_`task_var'=MW_Ch_Last_Yr*`task_var'
gen MW_Ch_2Yrs_Ago_X_`task_var'=MW_Ch_2Yrs_Ago*`task_var'
}


****Create Dummy Variables for the analysis*****
tab year, gen(year_dum)
tab state, gen(state_dum)
tab onetsoccode, gen(occ_dum)
tab group, gen(group_dum)

forvalues k=1/20{
forvalues j=1/42{
gen state_yr`k'_`j'=state_dum`j'*year_dum`k'
}
}

*** This creates wage-group specific variables for change in MW ****
forvalues g=1/4{
gen MW_Ch_Next_Yr_`g'=MW_Ch_Next_Yr*group_dum`g'
gen MW_Ch_This_Yr_`g'=MW_Ch_This_Yr*group_dum`g'
gen MW_Ch_Last_Yr_`g'=MW_Ch_Last_Yr*group_dum`g'
gen MW_Ch_2Yrs_Ago_`g'=MW_Ch_2Yrs_Ago*group_dum`g'

foreach task_var in "RoutineSh" "RoutineSh2" "RoutineSh3" "IntSh" "NRSh" "NRCogASh" "NRManPhysSh"{
gen MW_Ch_Next_Yr_X_`task_var'_`g'=MW_Ch_Next_Yr_X_`task_var'*group_dum`g'
gen MW_Ch_This_Yr_X_`task_var'_`g'=MW_Ch_This_Yr_X_`task_var'*group_dum`g'
gen MW_Ch_Last_Yr_X_`task_var'_`g'=MW_Ch_Last_Yr_X_`task_var'*group_dum`g'
gen MW_Ch_2Yrs_Ago_X_`task_var'_`g'=MW_Ch_2Yrs_Ago_X_`task_var'*group_dum`g'
}
}


***Creates Group-Year specific variables for employment levels and routineness/offshorability of Jobs***
forvalues k=1/4{
forvalues j=1/20{
gen lgr_orig_emp4_`k'_`j'=group_dum`k'*ln(orig_emp4)*year_dum`j'
gen lgr_orig_emp3_`k'_`j'=group_dum`k'*ln(orig_emp3)*year_dum`j'
foreach task_var in "RoutineSh" "RoutineSh2" "RoutineSh3" "IntSh" "NRSh" "NRCogASh" "NRManPhysSh"{
gen gr_`task_var'_`k'_`j'=group_dum`k'*`task_var'*year_dum`j'
}
}
}

sort state onetsoccode year

** Account for unrealistic changes in Personal and Home Care Aide employment in California in the latter period**
gen soi=1
replace soi=0 if description=="Personal And Home Care Aides" & state=="California" & year>=2010

save `path'\FormattedData\OES_Analysis_Data, replace

log close

